library(tidyverse)
library(readxl)
library(patchwork)
library(kableExtra)
library(knitr)
library(ranger)
library(vip)

Pitch Horizontal Vertical Pitch Proportion Spin Rate
4-Seamer 7.45 14.86 0.46 2285.29
Changeup 14.03 32.27 0.26 1754.87
Curveball 9.45 53.35 0.26 2572.18
Cutter 2.88 25.97 0.34 2380.57
Sinker 15.00 22.89 0.39 2127.16
Slider 6.42 36.28 0.34 2432.44
Splitter 11.71 33.09 0.28 1459.77

Pitch Classifications for Stuff+
Class Stf+ Range
1 < 55
2 < 70
3 < 85
4 < 95
5 < 105
6 < 115
7 < 130
8 < 145
9 < 160
10 ≥ 160

base <- Data2 %>% 
  select(Name, year, pitch_hand:Stuff, `Stf+ Pitch`, pitcher_break_z,
         pitcher_break_x, avg_speed, spin_rate, pitches_thrown, pitch_per,
         run_value:hard_hit_percent, fb_type:speed_diff)


tree_input <- base %>% 
  filter(!is.na(`Stf+ Pitch`), !is.na(run_value), !is.na(spin_rate),
         !is.na(fb_stuff), !is.na(fb_speed), !is.na(fb_thrown),
         !is.na(speed_diff)) %>% 
  select(-Name:-pitch_type_name, -fb_type) %>% 
  rename(pitch_stuff = `Stf+ Pitch`) %>% 
  select(-est_woba, -slg, -ba, -run_value_per_100, -run_value, -est_slg,
         -est_ba)

mlb_rf <- ranger(woba ~ ., data = tree_input, num.trees = 1000,
                      importance = "impurity") 

mlb_rf
## Ranger result
## 
## Call:
##  ranger(woba ~ ., data = tree_input, num.trees = 1000, importance = "impurity") 
## 
## Type:                             Regression 
## Number of trees:                  1000 
## Sample size:                      2074 
## Number of independent variables:  18 
## Mtry:                             4 
## Target node size:                 5 
## Variable importance mode:         impurity 
## Splitrule:                        variance 
## OOB prediction error (MSE):       0.002031904 
## R squared (OOB):                  0.5914404
vip(mlb_rf, geom = "point") + theme_bw()